from datasets import load_dataset,load_from_disk

from 第二节课.test_download import cache_dir

# Load the dataset
dataset = load_dataset(path ="lansinuote/ChnSentiCorp", cache_dir = "dada/")
print(dataset)

dataset = load_from_disk(r"D:\JSON文件所在位置")
print(dataset)

# 转为csv
dataset.to_csv(path_or_buf=r"D:\JSON文件所在位置\data.csv")
# 转为json
dataset.to_json(ath_or_buf=r"D:\JSON文件所在位置\data.jsonl")

# 训练数据
train_dataset = dataset['train']
print(train_dataset)
print(train_dataset[0])

# 校验数据
validation_dataset = dataset['validation']
print(validation_dataset)
print(validation_dataset[0])

# 测试数据
test_dataset = dataset['test']
print(test_dataset)
print(test_dataset[0])

# 扩展加载csv数据
csv_dataset = load_dataset(path="csv", data_files=r"D:\....\data.csv")
print(csv_dataset)

# 扩展加载json数据
load_dataset(path="json", data_files="data.jsonl")

# 扩展加载文本数据
load_dataset(path="text", data_files="data.txt")

# 扩
